No transformation required
## [1] 0.1919047
## [1] 4.102259e-05
# I don't like such strong transformations. Will use 0 instead
enplanements %>% BoxCox(lambda=0) %>% autoplot## [1] 0.1313326
retaildata <- read.csv("retail.csv")
mytimeseries <- ts(retaildata[,4], frequency=12, start=c(1982,4))
(lambda <- BoxCox.lambda(mytimeseries, lower=0))## [1] 4.102259e-05
## [1] 0.2548929
##
## Ljung-Box test
##
## data: Residuals from STL + ETS(M,N,N)
## Q* = 16.351, df = 6, p-value = 0.01199
##
## Model df: 2. Total lags used: 8
Now try it on the training/test split
train <- window(mytimeseries, end=c(2010,12))
test <- window(mytimeseries, start=2011)
fc <- stlf(train, h=length(test))
autoplot(train) +
autolayer(fc) + autolayer(test, series="Tests")## ME RMSE MAE MPE MAPE MASE
## Training set -0.2181675 8.577369 5.334122 -0.1992749 3.014946 0.2758386
## Test set 6.4831057 15.621518 11.655136 2.7094655 5.170648 0.6027114
## ACF1 Theil's U
## Training set -0.007305493 NA
## Test set 0.725258345 0.660509
## [1] 1
## [1] 2
## [1] 0
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] 1
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,2,1)
## Q* = 10.562, df = 8, p-value = 0.2278
##
## Model df: 2. Total lags used: 10
## Series: train
## ARIMA(1,1,0)(1,1,0)[12]
## Box Cox transformation: lambda= 0
##
## Coefficients:
## ar1 sar1
## -0.2172 -0.3909
## s.e. 0.0539 0.0507
##
## sigma^2 estimated as 0.003085: log likelihood=488.63
## AIC=-971.26 AICc=-971.19 BIC=-959.85
## Series: train
## ARIMA(1,1,0)(2,1,2)[12]
## Box Cox transformation: lambda= 0
##
## Coefficients:
## ar1 sar1 sar2 sma1 sma2
## -0.2133 0.3171 -0.2293 -1.1526 0.2972
## s.e. 0.0544 0.1806 0.0671 0.1809 0.1623
##
## sigma^2 estimated as 0.002121: log likelihood=544.27
## AIC=-1076.55 AICc=-1076.29 BIC=-1053.72
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,0)(1,1,0)[12]
## Q* = 111.14, df = 22, p-value = 6.894e-14
##
## Model df: 2. Total lags used: 24
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,0)(2,1,2)[12]
## Q* = 28.895, df = 19, p-value = 0.06765
##
## Model df: 5. Total lags used: 24
## ETS(M,Ad,M)
##
## Call:
## ets(y = train)
##
## Smoothing parameters:
## alpha = 0.8031
## beta = 0.0049
## gamma = 1e-04
## phi = 0.9767
##
## Initial states:
## l = 63.945
## b = 0.2145
## s = 0.9895 0.9373 1.0084 1.1736 1.0097 1.0174
## 0.9782 0.9979 0.9931 0.9466 0.9759 0.9725
##
## sigma: 0.0457
##
## AIC AICc BIC
## 3383.427 3385.526 3452.611
f1 <- snaive(train, h=length(test))
f2 <- hw(train, h=length(test), seasonal='multi')
f3 <- forecast(etsmod, h=length(test))
f4 <- stlf(train, lambda=lambda, h=length(test))
f5 <- forecast(arimamod1, h=length(test))
f6 <- forecast(arimamod2, h=length(test))c(
SNaive=accuracy(f1,test)["Test set","RMSE"],
HW=accuracy(f2,test)["Test set","RMSE"],
ETS=accuracy(f3,test)["Test set","RMSE"],
STLF=accuracy(f4,test)["Test set","RMSE"],
ARIMAmod1=accuracy(f5,test)["Test set","RMSE"],
ARIMAmod2=accuracy(f6,test)["Test set","RMSE"])## SNaive HW ETS STLF ARIMAmod1 ARIMAmod2
## 23.40458 59.85277 25.43061 25.71131 134.02186 32.85748
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,0)(2,1,0)[12]
## Q* = 26.616, df = 21, p-value = 0.1839
##
## Model df: 3. Total lags used: 24
arimafc <- function(y,h)
{
y %>% Arima(order=c(1,1,0), seasonal=c(2,1,2), lambda=0) %>% forecast(h=h)
}
e <- tsCV(mytimeseries, arimafc, h=12)
colMeans(tail(e^2, -14), na.rm=TRUE) %>% sqrt()## h=1 h=2 h=3 h=4 h=5 h=6 h=7 h=8
## 10.07589 12.63643 15.19079 17.35334 19.29556 20.82580 22.48765 23.87344
## h=9 h=10 h=11 h=12
## 25.22672 26.58908 27.88643 28.88262
## [1] 21.6123
## Warning: Removed 39 rows containing missing values (geom_point).
etsfc <- function(y,h)
{
ets(y) %>% forecast(h=h)
}
arimafc <- function(y,h)
{
auto.arima(y, lambda=0) %>% forecast(h=h)
}
e1 <- tsCV(mytimeseries, etsfc, h=12)
e2 <- tsCV(mytimeseries, arimafc, h=12)
MSE <- cbind(
ETS = colMeans(tail(e1^2, -14), na.rm=TRUE),
ARIMA = colMeans(tail(e2^2, -14), na.rm=TRUE))
colMeans(sqrt(MSE))## ETS ARIMA
## 20.63463 24.66610